Loading packages for the plots
library(ggplot2)
library(plotly)
library(flexdashboard)
library(dplyr)
library(tidyverse)
library(leaflet)
library(knitr)
alcohol_data_2007 = read_csv("./data/PRAM_2007_alcohol.csv")
tobacco_data_2007 = read_csv("./data/PRAM_2007_tobacco.csv")
no_contraception_data_2007 = read_csv("./data/PRAM_2007_no_contraception.csv")
infant_mortality_df = read_csv("./data/PRAM_2007_infantmortality.csv")
maternal_race = read_csv("./data/PRAM_2007_Maternal_Race.csv")
# cleaned alcohol data
cleaned_alc_2007 <- alcohol_data_2007 |>
janitor::clean_names() |>
select(-data_value_std_err, -data_value_type) |>
filter(response != "DRINKER WHO QUIT") |>
filter(response != "NONDRINKER") |>
filter( response != "NO") |>
drop_na(response,geolocation) |>
separate(geolocation, into = c("latitude", "longitude"), sep = ", ", convert = TRUE) |>
mutate(latitude = as.numeric(str_replace_all(latitude, "\\(|\\)", "")), # Convert to numeric and remove parentheses
longitude = as.numeric(str_replace_all(longitude, "\\(|\\)", ""))) # Convert to numeric and remove parentheses
# cleaned tobacco data
cleaned_tobac_2007 <- tobacco_data_2007 |>
janitor::clean_names() |>
select(-data_value_type) |>
filter(response != "SMOKER WHO QUIT") |>
filter(response != "NONSMOKER") |>
filter(response != "None (0 cig)") |>
filter( response != "NO") |>
drop_na(response, geolocation) |>
separate(geolocation, into = c("latitude", "longitude"), sep = ", ", convert = TRUE) |>
mutate(latitude = as.numeric(str_replace_all(latitude, "\\(|\\)", "")), # Convert to numeric and remove parentheses
longitude = as.numeric(str_replace_all(longitude, "\\(|\\)", ""))) # Convert to numeric and remove parentheses
cleaned_mat_race <- maternal_race |>
janitor::clean_names() |>
select(-data_value_std_err, -data_value_type) |>
drop_na(response,geolocation) |>
separate(geolocation, into = c("latitude", "longitude"), sep = ", ", convert = TRUE) |>
mutate(latitude = as.numeric(str_replace_all(latitude, "\\(|\\)", "")), # Convert to numeric and remove parentheses
longitude = as.numeric(str_replace_all(longitude, "\\(|\\)", ""))) # Convert to numeric and remove parentheses
no_alcohol_data_2007 = read_csv("./data/PRAM_2007_no_alcohol.csv")
no_tobacco_data_2007 = read_csv("./data/PRAM_2007_no_tobacco.csv")
contraception_data_2007 = read_csv("./data/PRAM_2007_contraception.csv")
# cleaned no alcohol data
cleaned_no_alc_2007 <- no_alcohol_data_2007 |>
janitor::clean_names() |>
select(-data_value_std_err, -geolocation, -data_value_type) |>
drop_na(response)
view(cleaned_no_alc_2007)
# cleaned no tobacco data
cleaned_no_tobacco_2007 <- no_tobacco_data_2007 |>
janitor::clean_names() |>
select(-data_value_std_err, -geolocation, -data_value_type) |>
drop_na(response)
# cleaned infant mortality
cleaned_infant_mortality <- infant_mortality_df |>
janitor::clean_names() |>
select(-data_value_std_err, -data_value_type, -data_value_unit, -data_value_footnote_symbol, -data_value_footnote) |>
drop_na(response, geolocation) |>
separate(geolocation, into = c("latitude", "longitude"), sep = ", ", convert = TRUE) |>
mutate(latitude = as.numeric(str_replace_all(latitude, "\\(|\\)", "")), # Convert to numeric and remove parentheses
longitude = as.numeric(str_replace_all(longitude, "\\(|\\)", ""))) # Convert to numeric and remove parentheses
# cleaned conception
cleaned_contraception_2007 <- contraception_data_2007 |>
janitor::clean_names() |>
select(-data_value_std_err, -geolocation, -data_value_type) |>
filter(response != "YES (CHECKED)") |>
filter(response != "YES") |>
drop_na(response)
# cleaned non conception
cleaned_no_contra_2007 <- no_contraception_data_2007 %>%
janitor::clean_names() %>%
select(-data_value_type) %>%
drop_na(response) |>
separate(geolocation, into = c("latitude", "longitude"), sep = ", ", convert = TRUE) |>
mutate(latitude = as.numeric(str_replace_all(latitude, "\\(|\\)", "")), # Convert to numeric and remove parentheses
longitude = as.numeric(str_replace_all(longitude, "\\(|\\)", ""))) # Convert to numeric and remove parentheses
Plot 1: Alcohol Consumption in relation to Infant Mortality
# Plot of question and responses for alcohol
cleaned_alc_2007 |>
ggplot(aes(x = question, fill = response)) +
geom_bar(position = "dodge") +
labs(title = "Questions and Responses", x = "Questions", y = "Count") +
theme_minimal() +
theme(axis.text.x = element_text(angle = 45, hjust = 1))+
labs(
x = "Question",
y = "Response",
title = "Questions vs Response of Alcohol Consumption"
)

# creating "yes" variable
# plot showing infant mortality rate vs alcohol consumption
ggplot() +
geom_point(data = cleaned_alc_2007, aes(x = question, y = response), color = "blue", size = 3) +
geom_point(data = cleaned_infant_mortality, aes(x = question, y = response), color = "red", size = 3) +
labs(title = "Scatter Plot of Two Variables from Different Datasets",
x = "X-axis Label",
y = "Y-axis Label") +
theme_minimal()

Plot 2: Tobacco Consumption in relation to Infant Mortality
Plot 3: No Consumption in relation to Infant Mortality
leaflet() |>
addTiles() |>
addCircleMarkers(data = cleaned_alc_2007,
lng = ~longitude, # Adjust column name if needed
lat = ~latitude, # Adjust column name if needed
label = ~location_abbr, # Assuming 'Group.1' is a column in your data
radius = 7,
color = "orange",
stroke = TRUE,
fillOpacity = 0.75,
popup = ~paste("Response:", response))
<<<<<<< HEAD
=======
>>>>>>> f719077d4826839fd0b793bb8f286a820ba51c9f
leaflet() |>
addTiles() |>
addCircleMarkers(data = cleaned_tobac_2007,
lng = ~longitude, # Adjust column name if needed
lat = ~latitude, # Adjust column name if needed
label = ~location_abbr, # Assuming 'Group.1' is a column in your data
radius = 7,
color = "orange",
stroke = TRUE,
fillOpacity = 0.75,
popup = ~paste("Response:", response))
<<<<<<< HEAD
=======
>>>>>>> f719077d4826839fd0b793bb8f286a820ba51c9f
leaflet() |>
addTiles() |>
addCircleMarkers(data = cleaned_infant_mortality,
lng = ~longitude, # Adjust column name if needed
lat = ~latitude, # Adjust column name if needed
label = ~location_abbr, # Assuming 'Group.1' is a column in your data
radius = 7,
color = "orange",
stroke = TRUE,
fillOpacity = 0.75,
popup = ~paste("Response:", response))
<<<<<<< HEAD
=======
>>>>>>> f719077d4826839fd0b793bb8f286a820ba51c9f
The plot above shows the locations of infant mortality rate across the US.
infant_deaths <- cleaned_infant_mortality |>
filter(question == "Indicator of infant currently alive" & response == "NO") |>
group_by(location_desc) |>
summarize(total_infant_deaths = n()) |>
kable()
print(infant_deaths)
##
##
## |location_desc | total_infant_deaths|
## |:------------------------|-------------------:|
## |Alaska | 45|
## |Arkansas | 45|
## |Colorado | 47|
## |Delaware | 40|
## |Georgia | 43|
## |Hawaii | 45|
## |Illinois | 47|
## |Maine | 42|
## |Maryland | 45|
## |Massachusetts | 44|
## |Michigan | 43|
## |Minnesota | 41|
## |Missouri | 42|
## |Nebraska | 45|
## |New Jersey | 39|
## |New York (excluding NYC) | 47|
## |New York City | 47|
## |North Carolina | 47|
## |Ohio | 46|
## |Oklahoma | 47|
## |Oregon | 46|
## |Pennsylvania | 3|
## |Rhode Island | 46|
## |South Carolina | 47|
## |South Dakota | 43|
## |Utah | 47|
## |Vermont | 47|
## |Washington | 43|
## |West Virginia | 47|
## |Wisconsin | 40|
## |Wyoming | 43|
The table provides a summary of total infant deaths by state, with
each row representing a specific location. The
location_desc column denotes the state, and the
total_infant_deaths column indicates the corresponding
number of infant deaths in each location. The data suggests variability
in infant mortality rates across different regions, with some areas
reporting higher or lower rates than others. For instance, states like
Pennsylvania have a notably lower count of infant deaths, while others,
such as Alaska and Arkansas, have higher counts. However, most of the
data seemed to stay within the 35 to 50 range. This summary provides an
overview of the distribution of infant deaths across various
geographical locations.
filtered_mortality_race <- cleaned_infant_mortality %>%
filter(break_out_category == "Maternal Race/Ethnicity" & (break_out == "Hispanic" | break_out == "Non-hispanic" | break_out == "White, non-Hispanic")) |>
filter(question == "Indicator of infant currently alive" & response == "NO")
print(filtered_mortality_race)
## # A tibble: 53 × 23
## year location_abbr location_desc class topic question data_source response
## <dbl> <chr> <chr> <chr> <chr> <chr> <chr> <chr>
## 1 2007 UT Utah Infant… Preg… Indicat… PRAMS NO
## 2 2007 OR Oregon Infant… Preg… Indicat… PRAMS NO
## 3 2007 WA Washington Infant… Preg… Indicat… PRAMS NO
## 4 2007 YC New York City Infant… Preg… Indicat… PRAMS NO
## 5 2007 OH Ohio Infant… Preg… Indicat… PRAMS NO
## 6 2007 ME Maine Infant… Preg… Indicat… PRAMS NO
## 7 2007 MD Maryland Infant… Preg… Indicat… PRAMS NO
## 8 2007 ME Maine Infant… Preg… Indicat… PRAMS NO
## 9 2007 MA Massachusetts Infant… Preg… Indicat… PRAMS NO
## 10 2007 IL Illinois Infant… Preg… Indicat… PRAMS NO
## # ℹ 43 more rows
## # ℹ 15 more variables: data_value <dbl>, low_confidence_limit <dbl>,
## # high_confidence_limit <dbl>, sample_size <dbl>, break_out <chr>,
## # break_out_category <chr>, latitude <dbl>, longitude <dbl>, class_id <chr>,
## # topic_id <chr>, question_id <chr>, location_id <dbl>, break_out_id <chr>,
## # break_out_categoryid <chr>, response_id <chr>
view(filtered_mortality_race)
plot_infant_deaths <- ggplot(filtered_mortality_race, aes(x = break_out, fill = break_out)) +
geom_bar() +
labs(title = "Infant Deaths by Ethnicity",
x = "Ethnicity",
y = "Total Infant Deaths") +
scale_fill_manual(values = c("Hispanic" = "red", "Non-hispanic" = "blue", "White, non-Hispanic" = "green")) +
theme_minimal()
The plot_infant_deaths above shows a plot of infant
deaths categorized by whether they were Hispanic or not. The graph shows
that those who were not Hispanic had a higher infant death count than
those who were Hispanic.